/*
Table 2: Heterogeneity in history-dependence by patient characteristics

- Merge in various characteristics at the patient level
- Run regression
*/

set more off
clear
use user_quarter_choice_cholesterol


// focus on cholesterol
tab indic_id
keep if indic_id == 5


// BASIC: LOOK AT IMPACT OF FIRST CHOICE ON LATER CHOICE 
egen first_q = min(quarter), by(enrolid)
gen first_choice_temp = generic_id if quarter == first_q
egen first_choice = max(first_choice_temp), by(enrolid)

gen first_generic_temp = generic if quarter == first_q
egen first_generic = max(first_generic_temp), by(enrolid)

gen first_xr_temp = xr if quarter == first_q
egen first_xr = max(first_xr_temp), by(enrolid)


gen period = quarter - first_q
gen chose_initial = (generic_id == first_choice) & (generic == first_generic) & (xr == first_xr)

drop first_choice_temp first_choice


// need name for overlap analysis
merge m:1 generic_id using generic_name_mapping
keep if _merge == 3
drop _merge


rename generic_id curr_generic_id
rename gennme curr_gennme
rename generic curr_generic
rename xr curr_xr
drop drugid

// user is in the dataset only based on first use
joinby enrolid indic_id using user_treatment_indicators_cholesterol


// filter out irrelevant entries
// 1) not either treatment or control for that entry event
keep if treat == 1 | control == 1
replace treat = 0 if control == 1

//replace treat_alt = 0 if control_alt == 1

// 2) quarter came before entry quarter
gen entry_quarter = qofd(entry_date)
gen rel_quart = quarter - entry_quarter
//keep if rel_quart >= 0

// A) generate the outcomes
gen chose_entry_drug = (curr_generic_id == generic_id) & (curr_generic == generic) & (curr_xr == xr)
tab chose_entry_drug


// B) analyze
gen treat_generic = treat * generic 
gen treat_xr = treat * xr
// is stickiness bigger for generic choice? (doesn't have to be)


// Does this need correcting?
gen initial = chose_entry_drug if quarter == first_q  // rel_quart == 0 (old code; this is probably cleaner; use their first choice, otherwise some not there in period 0)
egen initial_choice = max(initial), by(enrolid entry_num) // unnecessary? maybe only 1 entry_num per enrolid


// C) Merge in plan type
merge m:1 enrolid year using plantype_info
keep if _merge == 3
drop _merge

// freeze at the initial choice time
gen tt = plantyp if quarter == first_q
egen initial_plan_type = max(tt), by(enrolid)




keep if initial_plan_type == 4 | initial_plan_type == 6
gen ppo = (initial_plan_type == 6)



// figure out health in each quarter

merge 1:1 enrolid quarter using "cholesterol_user_drugs_user_aggregate"
keep if _merge == 1 | _merge == 3
replace distinct_drugs = 0 if distinct_drugs == .
replace non_cholesterol = 0 if non_cholesterol == .
drop _merge


merge 1:1 enrolid quarter using "cholesterol_user_inpatient_events_quarter_aggregate"
keep if _merge == 1 | _merge == 3
replace inpatient_events = 0 if inpatient_events == .
//replace heart_related = 0 if heart_related == .
drop _merge


// add on interaction variables
merge m:1 enrolid using "all_enrolid_info" // age, sex, emprel, plantyp
keep if _merge == 3
drop _merge

gen female = (sex == "2")

// create some variables fixed at start time
sort enrolid period
gen inpatient_start = inpatient_events if period == 0
replace inpatient_start = inpatient_start[_n-1] if enrolid == enrolid[_n-1] & inpatient_start[_n-1] != .

gen other_drugs_start = non_cholesterol if period == 0
replace other_drugs_start = other_drugs_start[_n-1] if enrolid == enrolid[_n-1] & other_drugs_start[_n-1] != .



// some summary stats for what people look like when first starting
summ non_cholesterol if period == 0, detail
//summ heart_related if period == 0, detail

gen first_prescription_year = year(dofq(first_q))
gen start_age = first_prescription_year - dobyr
summ start_age if period == 0, detail

tab female if period == 0
tab inpatient_events if period == 0

// winsorize and take logs (big tail)
summ other_drugs_start, detail
replace other_drugs_start = r(p99) if other_drugs_start > r(p99)
replace other_drugs_start = log(1+other_drugs_start)

replace start_age = 64 if start_age > 64 // a few odd entries



foreach v in start_age female inpatient_start other_drugs_start ppo {

	gen treat_`v' = treat * `v'
	gen choice_`v' = initial_choice * `v'

}


keep if generic == 0 // brand drug entry events

forvalues i=3(3)15 {

	display "Offset: `i' quarters"	
	eststo: ivregress 2sls chose_entry_drug ///
	(initial_choice choice_start_age choice_female choice_other_drugs_start choice_inpatient_start choice_ppo = ///
	treat treat_start_age treat_female treat_other_drugs_start treat_inpatient_start treat_ppo) if rel_quart == `i' 	
}

esttab using table_2.tex, con r2 se label replace booktabs keep(initial_choice choice_start_age choice_female choice_other_drugs_start choice_inpatient_start choice_ppo) title(Regression table \label{tab1})
eststo clear



